# Load in some packages
import calendar
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

fuel_df = pd.read_csv(r"C:\Users\jki\Downloads\Real-World_Fuel_Efficiency.csv")
fuel_df


fuel_df.describe()


# lets  check for missing 
missing_values =  fuel_df.isna().sum()
print(missing_values)

YEAR                                  0
MAKE                                  0
MODEL                                 0
Hybrid/Non-Hybrid                     0
STANDARD TYPE                         0
EPA RATING (CITY)                     0
Vehicle Count                         0
TOTAL ACTUAL MILES                    0
TOTAL ACTUAL FUEL                     0
EPA EXPECTED FUEL                     0
ACTUAL FUEL ECONOMY Geotab            0
PERCENT % DIFFERENCE ACTUAL TO EPA    0
ESTIMATED FUEL COSTS PER GALLON       0
FUEL COSTS PER MILE                   0
dtype: int64


# lets check on data types
fuel_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106 entries, 0 to 105
Data columns (total 14 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   YEAR                                106 non-null    int64  
 1   MAKE                                106 non-null    object 
 2   MODEL                               106 non-null    object 
 3   Hybrid/Non-Hybrid                   106 non-null    object 
 4   STANDARD TYPE                       106 non-null    object 
 5   EPA RATING (CITY)                   106 non-null    int64  
 6   Vehicle Count                       106 non-null    int64  
 7   TOTAL ACTUAL MILES                  106 non-null    int64  
 8   TOTAL ACTUAL FUEL                   106 non-null    float64
 9   EPA EXPECTED FUEL                   106 non-null    float64
 10  ACTUAL FUEL ECONOMY Geotab          106 non-null    float64
 11  PERCENT % DIFFERENCE ACTUAL TO EPA  106 non-null    float64
 12  ESTIMATED FUEL COSTS PER GALLON     106 non-null    object 
 13  FUEL COSTS PER MILE                 106 non-null    object 
dtypes: float64(4), int64(4), object(6)
memory usage: 11.7+ KB


# Plot the Fuel Consumtion for each Car Model

car_make =fuel_df.groupby('MAKE').sum()['TOTAL ACTUAL FUEL'] 

# Sort the values in descending order and select the top Car Makes
top_ten_car_make= car_make.sort_values(ascending=False).head(10)

# Display the result
print(top_ten_car_make)

# Plot the top ten Countries
top_ten_car_make.plot(kind='bar', color='blue',figsize=(10, 6))
plt.title('Top Ten Car Make Fuel Consumption')
plt.xlabel('Car Make')
plt.ylabel('Fuel Consumption')
plt.show()

MAKE
FORD            477336.5
TOYOTA          222598.0
CHEVROLET       183929.0
DODGE             5199.0
NISSAN            5037.0
GMC               3243.0
FREIGHTLINER       296.0
HONDA              221.0
Name: TOTAL ACTUAL FUEL, dtype: float64


# Plot the Fuel Consumtion for each Car Model

car_make =fuel_df.groupby('MAKE').sum()['TOTAL ACTUAL FUEL'] 

# Sort the values in descending order and select the top Car Makes
bottom_ten_car_make= car_make.sort_values(ascending=True).head(10)

# Display the result
print(bottom_ten_car_make)

# Plot the top ten Countries
bottom_ten_car_make.plot(kind='bar', color='blue',figsize=(10, 6))
plt.title('Top Ten Car Make Fuel Consumption')
plt.xlabel('Car Make')
plt.ylabel('Fuel Consumption')
plt.show()

MAKE
HONDA              221.0
FREIGHTLINER       296.0
GMC               3243.0
NISSAN            5037.0
DODGE             5199.0
CHEVROLET       183929.0
TOYOTA          222598.0
FORD            477336.5
Name: TOTAL ACTUAL FUEL, dtype: float64


# Plot the Fuel Consumtion for each Car Model

car_model =fuel_df.groupby('MODEL').sum()['TOTAL ACTUAL FUEL']

# Sort the values in descending order and select the top Car Makes
top_ten_car_model= car_model.sort_values(ascending=False).head(10)

# Display the result
print(top_ten_car_model)

# Plot the top ten Countries
top_ten_car_model.plot(kind='bar', color='green',figsize=(10, 6))
plt.title('Top Ten Car Model Fuel Consumption')
plt.xlabel('Car Model')
plt.ylabel('Fuel Consumption')
plt.show()

MODEL
PRIUS              145517.0
EXPRESS            118350.0
F150                99767.5
ESCAPE              89590.0
EXPLORER            41198.0
RAV4 HYBRID         40028.0
F250                38613.0
FUSION HYBRID       37974.0
TRANSIT CONNECT     27652.0
TAURUS              26022.0
Name: TOTAL ACTUAL FUEL, dtype: float64


# Plot the Fuel Consumtion for each Car Model

car_model =fuel_df.groupby('MODEL').sum()['TOTAL ACTUAL FUEL']

# Sort the values in descending order and select the top Car Makes
bottom_ten_car_model= car_model.sort_values(ascending=True).head(10)

# Display the result
print(bottom_ten_car_model)

# Plot the top ten Countries
bottom_ten_car_model.plot(kind='bar', color='green',figsize=(10, 6))
plt.title('Bottom Ten Car Model Fuel Consumption')
plt.xlabel('Car Model')
plt.ylabel('Fuel Consumption')
plt.show()

MODEL
CIVIC GX             221.0
SPRINTER 3500        296.0
AVALON HYBRID        569.0
FOCUS                793.0
RANGER               845.0
CHARGER             1061.0
YUKON               1064.0
CROWN VIC           2061.0
YUKON XL            2179.0
SILVERADO HYBRID    4125.0
Name: TOTAL ACTUAL FUEL, dtype: float64


# Plot the Fuel Cocumption for hybrid cars and non hybrid cars

hybrid_category =fuel_df.groupby('Hybrid/Non-Hybrid').sum()['TOTAL ACTUAL FUEL']

# Plot the pie chart
hybrid_category.plot(kind='pie', autopct='%1.1f%%', colors=['skyblue', 'lightgreen'], figsize=(8, 8))
plt.title('Fuel Consumption for Hybrid and Non-Hybrid Cars')
plt.ylabel('')  # No need for ylabel in pie chart
plt.show()


# Plot the Fuel Consumtion for each Car Model

car_body_type =fuel_df.groupby('STANDARD TYPE').sum()['TOTAL ACTUAL FUEL']

# Sort the values in descending order and select the top Body type
top_ten_car_body_type= car_body_type.sort_values(ascending=False).head(10)

# Display the result
print(top_ten_car_body_type)

# Plot the top ten Countries
top_ten_car_body_type.plot(kind='bar', color='brown',figsize=(10, 6))
plt.title(' Car body type  Fuel Consumption')
plt.xlabel('Car body type ')
plt.ylabel('Fuel Consumption')
plt.show()

STANDARD TYPE
SUV       274136.0
SEDAN     250728.0
VAN       207768.0
PICKUP    165227.5
Name: TOTAL ACTUAL FUEL, dtype: float64


fuel_df.head(5)


# Selecting data for Honda
Ford_data = fuel_df[fuel_df['MAKE'] == "FORD"]

# Extracting specific columns
Fordf1 = Ford_data ['Vehicle Count']
Fordf2 = Ford_data ['TOTAL ACTUAL FUEL']

# Checking if both conditions hold true
Ford_filtered_data = Ford_data[(Fordf1.notnull()) & (Fordf2.notnull())]

Ford_filtered_data.head(5)


Ford_filtered_data.describe()


import plotly.express as px

# Plot relationship between life expectancy and years
fig = px.bar(Ford_filtered_data, x='YEAR', y='TOTAL ACTUAL FUEL', height=320, labels={'TOTAL ACTUAL FUEL':'Fuel Consumption'}, title='Relationship between Fuel Consumption and Years - Ford')
fig.update_layout(xaxis_tickangle=-45)  # Rotate x-axis labels for better readability

# Show the plot
fig.show()


# Selecting data for Honda
PRIUS_data = fuel_df[fuel_df['MODEL'] == "PRIUS"]

# Extracting specific columns
PRIUSf1 = PRIUS_data['Vehicle Count']
PRIUSf2 = PRIUS_data['TOTAL ACTUAL FUEL']

# Checking if both conditions hold true
PRIUS_filtered_data = PRIUS_data[(PRIUSf1.notnull()) & (PRIUSf2.notnull())]

PRIUS_filtered_data.head(5)


import plotly.express as px

# Plot relationship between life expectancy and years
fig = px.bar(PRIUS_filtered_data, x='YEAR', y='TOTAL ACTUAL FUEL', height=320, labels={'TOTAL ACTUAL FUEL':'Fuel Consumption'}, title='Relationship between Fuel Consumption and Years - PRIUS')
fig.update_layout(xaxis_tickangle=-45)  # Rotate x-axis labels for better readability

# Show the plot
fig.show()


# Selecting data for Honda
SUV_data = fuel_df[fuel_df['STANDARD TYPE'] == "SUV"]

# Extracting specific columns
SUVf1 = SUV_data['Vehicle Count']
SUVf2 = SUV_data['TOTAL ACTUAL FUEL']

# Checking if both conditions hold true
SUV_filtered_data = SUV_data[(SUVf1.notnull()) & (SUVf2.notnull())]

SUV_filtered_data.head(3)


# Plot relationship between life expectancy and years
fig = px.bar(SUV_filtered_data, x='YEAR', y='TOTAL ACTUAL FUEL', height=320, labels={'TOTAL ACTUAL FUEL':'Fuel Consumption'}, title='SUV Fuel Consuption Histogram - SUVS')
fig.update_layout(xaxis_tickangle=-45)  # Rotate x-axis labels for better readability

# Show the plot
fig.show()

	YEAR	MAKE	MODEL	Hybrid/Non-Hybrid	STANDARD TYPE	EPA RATING (CITY)	Vehicle Count	TOTAL ACTUAL MILES	TOTAL ACTUAL FUEL	EPA EXPECTED FUEL	ACTUAL FUEL ECONOMY Geotab	PERCENT % DIFFERENCE ACTUAL TO EPA	ESTIMATED FUEL COSTS PER GALLON	FUEL COSTS PER MILE
0	2008	HONDA	CIVIC GX	Non-Hybrid	SEDAN	24	9	4459	221.0	185.79	20.18	-0.16	$ 552.50	$ 0.12
1	2008	FORD	CROWN VIC	Non-Hybrid	SEDAN	11	7	17162	1771.0	1560.18	9.69	-0.12	$ 4,427.50	$ 0.26
2	2008	CHEVROLET	EXPRESS	Non-Hybrid	VAN	13	1	413	65.0	31.77	6.35	-0.51	$ 162.50	$ 0.39
3	2008	FORD	FOCUS	Non-Hybrid	SEDAN	24	49	13074	793.0	544.75	16.49	-0.31	$ 1,982.50	$ 0.15
4	2008	FORD	FUSION	Non-Hybrid	SEDAN	18	5	29482	1813.0	1637.89	16.26	-0.10	$ 4,532.50	$ 0.15
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
101	2019	CHEVROLET	EXPRESS	Non-Hybrid	VAN	13	22	10316	885.0	793.54	11.66	-0.10	$ 2,212.50	$ 0.21
102	2019	FORD	F150	Non-Hybrid	PICKUP	16	27	56410	5419.0	3525.63	10.41	-0.35	$ 13,547.50	$ 0.24
103	2019	FORD	F250	Non-Hybrid	PICKUP	12	30	84521	5449.0	7043.42	15.51	0.29	$ 13,622.50	$ 0.16
104	2019	FREIGHTLINER	SPRINTER 3500	Non-Hybrid	VAN	15	38	3277	296.0	218.47	11.07	-0.26	$ 740.00	$ 0.23
105	2019	FORD	TRANSIT VAN	Non-Hybrid	VAN	20	7	29068	2331.0	1453.40	12.47	-0.38	$ 5,827.50	$ 0.20

	YEAR	EPA RATING (CITY)	Vehicle Count	TOTAL ACTUAL MILES	TOTAL ACTUAL FUEL	EPA EXPECTED FUEL	ACTUAL FUEL ECONOMY Geotab	PERCENT % DIFFERENCE ACTUAL TO EPA
count	106.000000	106.000000	106.000000	1.060000e+02	106.000000	106.000000	106.000000	106.000000
mean	2013.698113	23.254717	38.320755	1.742588e+05	8470.372642	6903.022358	19.883491	-0.166321
std	3.165799	11.495806	51.139128	2.837817e+05	9212.284006	7641.761732	12.329095	0.226229
min	2008.000000	9.000000	1.000000	4.130000e+02	65.000000	31.770000	5.350000	-0.670000
25%	2011.000000	15.000000	10.000000	3.217475e+04	1765.750000	1412.797500	10.545000	-0.317500
50%	2014.000000	19.000000	20.500000	8.400450e+04	5365.500000	4511.125000	14.930000	-0.160000
75%	2016.000000	28.000000	45.750000	1.800435e+05	11911.500000	9723.207500	26.090000	-0.040000
max	2019.000000	55.000000	327.000000	2.144639e+06	44874.000000	44679.980000	54.580000	0.730000

	YEAR	EPA RATING (CITY)	Vehicle Count	TOTAL ACTUAL MILES	TOTAL ACTUAL FUEL	EPA EXPECTED FUEL	ACTUAL FUEL ECONOMY Geotab	PERCENT % DIFFERENCE ACTUAL TO EPA
count	55.00000	55.000000	55.000000	55.000000	55.000000	55.000000	55.000000	55.000000
mean	2013.60000	19.981818	29.163636	124784.927273	8678.845455	6577.763636	15.788364	-0.209818
std	3.18329	8.154733	36.182031	140896.517633	9032.667358	6769.011186	8.346563	0.221138
min	2008.00000	10.000000	5.000000	3263.000000	289.000000	203.940000	6.620000	-0.560000
25%	2011.00000	15.000000	9.000000	32502.000000	1897.000000	1449.540000	10.425000	-0.380000
50%	2014.00000	18.000000	16.000000	83488.000000	5681.000000	4538.200000	12.470000	-0.210000
75%	2016.00000	21.500000	30.500000	161921.500000	10707.750000	8714.580000	16.695000	-0.090000
max	2019.00000	43.000000	189.000000	689589.000000	40809.000000	32837.570000	41.460000	0.520000

	YEAR	MAKE	MODEL	Hybrid/Non-Hybrid	STANDARD TYPE	EPA RATING (CITY)	Vehicle Count	TOTAL ACTUAL MILES	TOTAL ACTUAL FUEL	EPA EXPECTED FUEL	ACTUAL FUEL ECONOMY Geotab	PERCENT % DIFFERENCE ACTUAL TO EPA	ESTIMATED FUEL COSTS PER GALLON	FUEL COSTS PER MILE
6	2008	TOYOTA	PRIUS	Hybrid	SEDAN	48	189	1034445	25453.0	21550.94	40.64	-0.15	$ 63,632.50	$ 0.06
13	2009	TOYOTA	PRIUS	Hybrid	SEDAN	48	86	341538	8249.0	7115.38	41.40	-0.14	$ 20,622.50	$ 0.06
18	2010	TOYOTA	PRIUS	Hybrid	SEDAN	51	84	266257	5760.0	5220.73	46.23	-0.09	$ 14,400.00	$ 0.05
26	2011	TOYOTA	PRIUS	Hybrid	SEDAN	49	86	374960	7968.0	7652.24	47.06	-0.04	$ 19,920.00	$ 0.05
38	2012	TOYOTA	PRIUS	Hybrid	SEDAN	49	15	55592	1184.0	1134.53	46.95	-0.04	$ 2,960.00	$ 0.05

Return Home

Return to Python Projects Page

City of New York Real-World Fuel Efficiency Exploratory Data Analysis¶

Research Question :¶

1.Display top 10 Car Makes with the highest Fuel Consumption¶

2.Display bottom 10 Car Makes with the highest Fuel Consumption¶

3.Display top 10 Car Models with the highest Fuel Consumption¶

4.Display bottom 10 Car Models with the highest Fuel Consumption¶

5.Display Hybrid Cars Vs Non Hybrid Cars Distribution¶

6.Display Car body type Fuel Consumption¶

7 Perform Case Studies on Outliers¶

8 .Provide conclusions after EDA¶

1 Display top 10 Car Makes with the highest Fuel Consumption¶

2 Display bottom 10 Car Makes with the highest Fuel Consumption¶

3 Display top 10 Car Models with the highest Fuel Consumption¶

4 Display bottom 10 Car Models with the highest Fuel Consumption¶

5 Display Hybrid Cars Vs Non Hybrid Cars Distribution¶

5 Display Car body type Fuel Consumption¶

Case Study 1: Ford¶

Case Study 2: PRIUS¶

Case Study 3: SUV Body type¶

Conclusion¶

	YEAR	MAKE	MODEL	Hybrid/Non-Hybrid	STANDARD TYPE	EPA RATING (CITY)	Vehicle Count	TOTAL ACTUAL MILES	TOTAL ACTUAL FUEL	EPA EXPECTED FUEL	ACTUAL FUEL ECONOMY Geotab	PERCENT % DIFFERENCE ACTUAL TO EPA	ESTIMATED FUEL COSTS PER GALLON	FUEL COSTS PER MILE
5	2008	TOYOTA	HIGHLANDER HYBRID	Hybrid	SUV	27	15	36095	1667.0	1336.85	21.65	-0.20	$ 4,167.50	$ 0.12
10	2009	FORD	ESCAPE	Non-Hybrid	SUV	19	14	3903	430.0	205.42	9.08	-0.52	$ 1,075.00	$ 0.28
11	2009	FORD	ESCAPE HYBRID	Hybrid	SUV	34	32	180992	8729.0	5323.29	20.73	-0.39	$ 21,822.50	$ 0.12